EDA

Author

Daniel Chng, Leon Tan & Zoe Chia

Published

February 28, 2023

Modified

March 23, 2023

# Import Packages
pacman::p_load(sf, tmap, tidyverse, sfdep, readxl, Kendall, plotly, plyr)
# Import MPSZ
mpsz <- st_read(dsn = "data/geospatial", layer = "MPSZ-2019")
Reading layer `MPSZ-2019' from data source 
  `C:\wamp64\www\IS415-Project\data\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS:  WGS 84
mpsz <- st_transform(mpsz, 3414)
write_rds(mpsz, "data/model/mpsz.rds")
# Import MRT Stations
mrt <- st_read(dsn = "data/geospatial", layer = "RapidTransitSystemStation")
Reading layer `RapidTransitSystemStation' from data source 
  `C:\wamp64\www\IS415-Project\data\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 220 features and 4 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 6068.209 ymin: 27478.44 xmax: 45377.5 ymax: 47913.58
Projected CRS: SVY21
mrt_sf <- st_transform(mrt, 3414)
mrt_sf <- mrt_sf %>%
  filter(str_detect(mrt_sf$STN_NAM_DE, "MRT STATION|LRT STATION"))
write_rds(mrt_sf, "data/model/mrt_sf.rds")
# MRT Rail Line
rail <- st_read(dsn = "data/geospatial", layer = "G_MP14_RAIL_LI")
Reading layer `G_MP14_RAIL_LI' from data source 
  `C:\wamp64\www\IS415-Project\data\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 1399 features and 9 fields (with 2 geometries empty)
Geometry type: MULTILINESTRING
Dimension:     XY
Bounding box:  xmin: 5950.856 ymin: 26030.91 xmax: 45473.16 ymax: 48252.23
Projected CRS: SVY21
rail_sf <- st_transform(rail, 3414)
# Tourism
tourism_sf <- st_read(dsn = "data/geospatial", layer = "TOURISM")
Reading layer `TOURISM' from data source 
  `C:\wamp64\www\IS415-Project\data\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 107 features and 19 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -1.797693e+308 ymin: -1.797693e+308 xmax: 43659.54 ymax: 47596.73
Projected CRS: SVY21
# Assign EPSG Code
tourism_sf <- st_transform(tourism_sf, 3414)
write_rds(tourism_sf, "data/model/tourism_sf.rds")
# Shopping Malls
shopping <- read.csv("data/geospatial/mall_coordinates.csv")

shopping <- shopping %>%
  select(name, latitude, longitude)

glimpse(shopping)
Rows: 199
Columns: 3
$ name      <chr> "100 AM", "i12 Katong", "313@SOMERSET", "321 CLEMENTI", "600…
$ latitude  <dbl> 1.274588, 1.305087, 1.301385, 1.312025, 1.334042, 1.437131, …
$ longitude <dbl> 103.8435, 103.9051, 103.8377, 103.7650, 103.8510, 103.7953, …
shopping_sf <- st_as_sf(shopping,
                              coords = c("longitude",
                                         "latitude"),
                              crs = 4326) %>%
  st_transform(crs = 3414)
write_rds(shopping_sf, "data/model/shopping_sf.rds")
# Hexagon for Map
hexagons <- st_read(dsn = "data/geospatial", layer = "hexagons")
Reading layer `hexagons' from data source 
  `C:\wamp64\www\IS415-Project\data\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 3125 features and 6 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 2667.538 ymin: 21506.33 xmax: 50010.26 ymax: 50256.33
Projected CRS: SVY21 / Singapore TM
hexagons_sf <- st_transform(hexagons, 3414)
write_rds(hexagons_sf, "data/model/hexagons_sf.rds")
# HDB Population by Geographical Location
hdb <- read.csv("data/aspatial/hdb-resident-population-by-geographical-distribution.csv")
# Filter out 2018 data
hdb <- filter(hdb, shs_year == "2018")
write_rds(hdb, "data/model/hdb.rds")
tmap_mode('view')
tmap_options(check.and.fix = TRUE) +
tm_shape(mpsz) +
  tm_polygons() +
tm_shape(mrt_sf) +
  tm_dots(alph=0.5, size=0.01)+
  tm_view(set.zoom.limits = c(11,14))
mpsz_hdb <- left_join(mpsz, hdb, by = c("PLN_AREA_N" = "town_estate"))
# Population Density
tmap_mode('view')
tm_shape(mpsz_hdb) +
  tm_fill("number")
# MRT vs Population Density
tmap_mode('view')
tm_shape(mpsz_hdb) +
  tm_fill("number") +
tm_shape(mrt_sf) +
  tm_dots(alph=0.5, size=0.01)+
  tm_view(set.zoom.limits = c(11,14))
# Population Data 2022
popdata <- read_csv("data/aspatial/respopagesexfa2022.csv")
popdata2022 <- popdata %>%
  spread(AG, Pop) %>%
  mutate(YOUNG = `0_to_4`+`5_to_9`+`10_to_14`+
`15_to_19`+`20_to_24`) %>%
mutate(`ECONOMY ACTIVE` = rowSums(.[9:13])+
rowSums(.[15:17]))%>%
mutate(`AGED`=rowSums(.[18:22])) %>%
mutate(`TOTAL`=rowSums(.[5:22])) %>%  
mutate(`DEPENDENCY` = (`YOUNG` + `AGED`)
/`ECONOMY ACTIVE`) %>%
mutate_at(.vars = vars(PA, SZ), toupper) %>%
select(`PA`, `SZ`, `YOUNG`, `ECONOMY ACTIVE`, `AGED`, 
       `TOTAL`, `DEPENDENCY`) %>%
  filter(`ECONOMY ACTIVE` > 0)
popdata2022 <- popdata2022 %>%
  mutate_at(.vars = vars(PA, SZ), 
          .funs = funs(toupper)) %>%
  filter(`ECONOMY ACTIVE` > 0)
write_rds(popdata2022, "data/model/popdata2022.rds")
mpsz_popdata2022 <- left_join(mpsz, popdata2022, by = c("SUBZONE_N" = "SZ"))
write_rds(mpsz_popdata2022, "data/model/mpsz_popdata2022.rds")
summary(mpsz_popdata2022$DEPENDENCY)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 0.0000  0.6786  0.8645  0.9583  1.0000 17.0000      99 
# MRT vs Population Density
tmap_mode('view')
tm_shape(mpsz_popdata2022) +
  tm_fill("DEPENDENCY",
          breaks = c(0, 0.60, 0.70, 0.80, 0.90, 1.00)) +
  tm_borders(lwd = 0.1,  alpha = 1) +
tm_shape(mrt_sf) +
  tm_dots(alph=0.5, size=0.07)+
  tm_view(set.zoom.limits = c(11,14)) +
tm_shape(rail_sf) +
  tm_lines(lty = "solid",
           scale = 1)